코드 최적화 개요#
강좌: 수치해석 프로젝트
코드 Profiling#
실행 시간, 함수 호출 정보를 측정해서, 계산 시간이 많이 걸리는 Hot-spot 분석
주요 측정 방법#
총 시간 측정
BASH shell :
time
Jupyter :
%time
,%timeit
함수 호출 측정
Python : cProfile
Jupyter :
%prun
라인별 소요 시간 측정
Python : line_profiler
Jupyter :
%lprun
예제#
Jacobi method의 Hot-spot을 찾아라
from scipy import linalg
import numpy as np
def jacobi(n, ti, dt):
"""
Jacobi method
Parameters
----------
n : integer
size
ti : float
current time
dt : array
difference
"""
for i in range(1, n+1):
for j in range(1, n+1):
dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
# Update
ti += dt
def jacobi_v1(n, ti, dt):
"""
Jacobi method (Vector version)
Parameters
----------
ti : float
current time
Parameters
-----------
dt : array
difference
"""
dt[1:-1, 1:-1] = 0.25*(ti[:-2, 1:-1] + ti[1:-1, :-2] + ti[2:, 1:-1] + ti[1:-1, 2:]) - ti[1:-1, 1:-1]
# Update
ti += dt
def solve_laplace(n, solver, tol=1e-5, order='C'):
"""
Laplace Equation solver
Parameters
----------
n : integer
size
solver : function
iterative solver
tol : float
tolerance
order : string
'C' | 'F'
Returns
-------
err : float
residual
"""
ti = np.zeros((n+2, n+2), order=order)
dt = np.zeros((n+2, n+2), order=order)
def bc(t):
t[-1, 1:-1] = 300
t[0, 1:-1] = 100
t[1:-1, -1] = 100
t[1:-1, 0] = 100
err = 1
hist = []
while err > tol:
# Apply BC
bc(ti)
# Run Gauss-Seidel
solver(n, ti, dt)
# Compute Error
err = linalg.norm(dt) / n
return err
코드 실행 시간 측정#
Python loop는 상당히 느림
# Python loop
%time solve_laplace(49, jacobi)
CPU times: user 7.31 s, sys: 1.09 ms, total: 7.31 s
Wall time: 7.31 s
9.997943770878389e-06
# Vector version
%time solve_laplace(49, jacobi_v1)
CPU times: user 103 ms, sys: 0 ns, total: 103 ms
Wall time: 102 ms
9.997943770878389e-06
Profile 결과#
jacobi 함수가 대부분의 계산 시간을 차지함
Vector version에서는 이 시간이 100배 이상 단축됨
%prun -T prof_jacobi.txt solve_laplace(49, jacobi)
print(open('prof_jacobi.txt').read())
*** Profile printout saved to text file 'prof_jacobi.txt'.
102346 function calls (97229 primitive calls) in 8.043 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
5117 7.978 0.002 7.978 0.002 3378516824.py:1(jacobi)
5117 0.011 0.000 0.022 0.000 linalg.py:2363(norm)
5117 0.010 0.000 0.020 0.000 function_base.py:422(asarray_chkfinite)
10234/5117 0.008 0.000 0.023 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
5117 0.007 0.000 0.007 0.000 3211324933.py:24(bc)
5117 0.007 0.000 0.007 0.000 {method 'reduce' of 'numpy.ufunc' objects}
5117 0.006 0.000 0.053 0.000 _misc.py:17(norm)
1 0.004 0.004 8.043 8.043 3211324933.py:1(solve_laplace)
5117 0.002 0.000 0.026 0.000 <__array_function__ internals>:2(norm)
5117 0.002 0.000 0.008 0.000 <__array_function__ internals>:2(dot)
5117 0.001 0.000 0.010 0.000 {method 'all' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.008 0.000 _methods.py:60(_all)
5117 0.001 0.000 0.001 0.000 {method 'ravel' of 'numpy.ndarray' objects}
10234 0.001 0.000 0.001 0.000 {built-in method numpy.asarray}
5117 0.001 0.000 0.001 0.000 linalg.py:112(isComplexType)
10234 0.001 0.000 0.001 0.000 {built-in method builtins.issubclass}
5117 0.001 0.000 0.001 0.000 multiarray.py:736(dot)
5117 0.000 0.000 0.000 0.000 linalg.py:2359(_norm_dispatcher)
1 0.000 0.000 8.043 8.043 {built-in method builtins.exec}
2 0.000 0.000 0.000 0.000 {built-in method numpy.zeros}
1 0.000 0.000 8.043 8.043 <string>:1(<module>)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
%prun -T prof_jacobi_v1.txt solve_laplace(49, jacobi_v1)
print(open('prof_jacobi_v1.txt').read())
*** Profile printout saved to text file 'prof_jacobi_v1.txt'.
102346 function calls (97229 primitive calls) in 0.127 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
5117 0.063 0.000 0.063 0.000 3378516824.py:21(jacobi_v1)
5117 0.011 0.000 0.021 0.000 linalg.py:2363(norm)
5117 0.009 0.000 0.019 0.000 function_base.py:422(asarray_chkfinite)
5117 0.007 0.000 0.007 0.000 3211324933.py:24(bc)
10234/5117 0.007 0.000 0.023 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
5117 0.007 0.000 0.007 0.000 {method 'reduce' of 'numpy.ufunc' objects}
5117 0.006 0.000 0.052 0.000 _misc.py:17(norm)
1 0.005 0.005 0.127 0.127 3211324933.py:1(solve_laplace)
5117 0.003 0.000 0.026 0.000 <__array_function__ internals>:2(norm)
5117 0.002 0.000 0.008 0.000 <__array_function__ internals>:2(dot)
5117 0.001 0.000 0.009 0.000 {method 'all' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.008 0.000 _methods.py:60(_all)
10234 0.001 0.000 0.001 0.000 {built-in method numpy.asarray}
5117 0.001 0.000 0.001 0.000 {method 'ravel' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.001 0.000 linalg.py:112(isComplexType)
10234 0.001 0.000 0.001 0.000 {built-in method builtins.issubclass}
5117 0.001 0.000 0.001 0.000 multiarray.py:736(dot)
5117 0.001 0.000 0.001 0.000 linalg.py:2359(_norm_dispatcher)
2 0.000 0.000 0.000 0.000 {built-in method numpy.zeros}
1 0.000 0.000 0.127 0.127 {built-in method builtins.exec}
1 0.000 0.000 0.127 0.127 <string>:1(<module>)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
Line profile#
lprof
를 이용해 라인별 계산 시간 측정
Note
line_profiler 패키지가 설치되어야 함
solver 함수가 Hot-spot임
# Line profiler magic key 호출
%load_ext line_profiler
%lprun -T lprof_jacobi.txt -f solve_laplace solve_laplace(49, jacobi)
print(open('lprof_jacobi.txt').read())
*** Profile printout saved to text file 'lprof_jacobi.txt'.
Timer unit: 1e-09 s
Total time: 10.427 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1 def solve_laplace(n, solver, tol=1e-5, order='C'):
2 """
3 Laplace Equation solver
4
5 Parameters
6 ----------
7 n : integer
8 size
9 solver : function
10 iterative solver
11 tol : float
12 tolerance
13 order : string
14 'C' | 'F'
15
16 Returns
17 -------
18 err : float
19 residual
20 """
21 1 10149.0 10149.0 0.0 ti = np.zeros((n+2, n+2), order=order)
22 1 19958.0 19958.0 0.0 dt = np.zeros((n+2, n+2), order=order)
23
24 1 290.0 290.0 0.0 def bc(t):
25 t[-1, 1:-1] = 300
26 t[0, 1:-1] = 100
27 t[1:-1, -1] = 100
28 t[1:-1, 0] = 100
29
30 1 110.0 110.0 0.0 err = 1
31 1 100.0 100.0 0.0 hist = []
32 5117 913867.0 178.6 0.0 while err > tol:
33 # Apply BC
34 5117 9189306.0 1795.8 0.1 bc(ti)
35
36 # Run Gauss-Seidel
37 5117 10365925356.0 2025781.8 99.4 solver(n, ti, dt)
38
39 # Compute Error
40 5117 50949347.0 9956.9 0.5 err = linalg.norm(dt) / n
41
42 1 80.0 80.0 0.0 return err
%lprun -T lprof_jacobi_inner.txt -f jacobi solve_laplace(49, jacobi)
print(open('lprof_jacobi_inner.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_inner.txt'.
Timer unit: 1e-09 s
Total time: 12.3775 s
File: /tmp/ipykernel_63391/3378516824.py
Function: jacobi at line 1
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1 def jacobi(n, ti, dt):
2 """
3 Jacobi method
4
5 Parameters
6 ----------
7 n : integer
8 size
9 ti : float
10 current time
11 dt : array
12 difference
13 """
14 250733 22336006.0 89.1 0.2 for i in range(1, n+1):
15 12285917 1153899534.0 93.9 9.3 for j in range(1, n+1):
16 12285917 11193868106.0 911.1 90.4 dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
17
18 # Update
19 5117 7381007.0 1442.4 0.1 ti += dt
%lprun -T lprof_jacobi_v1.txt -f solve_laplace solve_laplace(49, jacobi_v1)
print(open('lprof_jacobi_v1.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_v1.txt'.
Timer unit: 1e-09 s
Total time: 0.124165 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1 def solve_laplace(n, solver, tol=1e-5, order='C'):
2 """
3 Laplace Equation solver
4
5 Parameters
6 ----------
7 n : integer
8 size
9 solver : function
10 iterative solver
11 tol : float
12 tolerance
13 order : string
14 'C' | 'F'
15
16 Returns
17 -------
18 err : float
19 residual
20 """
21 1 9899.0 9899.0 0.0 ti = np.zeros((n+2, n+2), order=order)
22 1 19947.0 19947.0 0.0 dt = np.zeros((n+2, n+2), order=order)
23
24 1 290.0 290.0 0.0 def bc(t):
25 t[-1, 1:-1] = 300
26 t[0, 1:-1] = 100
27 t[1:-1, -1] = 100
28 t[1:-1, 0] = 100
29
30 1 110.0 110.0 0.0 err = 1
31 1 110.0 110.0 0.0 hist = []
32 5117 906762.0 177.2 0.7 while err > tol:
33 # Apply BC
34 5117 9419200.0 1840.8 7.6 bc(ti)
35
36 # Run Gauss-Seidel
37 5117 64892267.0 12681.7 52.3 solver(n, ti, dt)
38
39 # Compute Error
40 5117 48916473.0 9559.6 39.4 err = linalg.norm(dt) / n
41
42 1 90.0 90.0 0.0 return err
%lprun -T lprof_jacobi_inner_v1.txt -f jacobi_v1 solve_laplace(49, jacobi_v1)
print(open('lprof_jacobi_inner_v1.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_inner_v1.txt'.
Timer unit: 1e-09 s
Total time: 0.0713991 s
File: /tmp/ipykernel_63391/3378516824.py
Function: jacobi_v1 at line 21
Line # Hits Time Per Hit % Time Line Contents
==============================================================
21 def jacobi_v1(n, ti, dt):
22 """
23 Jacobi method (Vector version)
24
25 Parameters
26 ----------
27 ti : float
28 current time
29
30 Parameters
31 -----------
32 dt : array
33 difference
34 """
35 5117 64485401.0 12602.2 90.3 dt[1:-1, 1:-1] = 0.25*(ti[:-2, 1:-1] + ti[1:-1, :-2] + ti[2:, 1:-1] + ti[1:-1, 2:]) - ti[1:-1, 1:-1]
36
37 # Update
38 5117 6913738.0 1351.1 9.7 ti += dt
코드 최적화#
Hot-spot을 빠르게 구현할 방법을 고안
Python
For
loop가 매우 느려서 나타나는 상황임
가속화 #1
Fortran, C/C++ 함수를 구현한 후 파이썬에 연동
F2Py
ctypes
,Cython
,Cffi
등포트란 문법 참고
src="""
! Jacobi Method
subroutine jacobi(n, ti, dt)
implicit none
integer, intent(in) :: n
real(8), dimension(:, :), intent(inout) :: ti
real(8), dimension(:, :), intent(inout) :: dt
integer :: i, j
do j = 2, n+1
do i = 2, n+1
dt(i,j) = 0.25*(ti(i-1,j) + ti(i,j-1) + ti(i+1,j) + ti(i, j+1)) - ti(i,j)
enddo
enddo
! Update
do j = 1, n+2
do i = 1, n+2
ti(i,j) = ti(i,j) + dt(i,j)
enddo
enddo
end subroutine jacobi
"""
# Write f90 code
with open('fjacobi.f90', 'w') as f:
f.write(src)
# Make fortran module
!f2py -c -m fsolver fjacobi.f90
[39mrunning build[0m
[39mrunning config_cc[0m
[39munifing config_cc, config, build_clib, build_ext, build commands --compiler options[0m
[39mrunning config_fc[0m
[39munifing config_fc, config, build_clib, build_ext, build commands --fcompiler options[0m
[39mrunning build_src[0m
[39mbuild_src[0m
[39mbuilding extension "fsolver" sources[0m
[39mf2py options: [][0m
[39mf2py:> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c[0m
[39mcreating /tmp/tmpui9nfs8q/src.linux-x86_64-3.10[0m
Reading fortran codes...
Reading file 'fjacobi.f90' (format:free)
Post-processing...
Block: fsolver
Block: jacobi
Post-processing (stage 2)...
Building modules...
Building module "fsolver"...
Creating wrapper for Fortran subroutine "jacobi"("jacobi")...
Constructing wrapper function "jacobi"...
jacobi(n,ti,dt)
Wrote C/API module "fsolver" to file "/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c"
Fortran 90 wrappers are saved to "/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90"
[39m adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c' to sources.[0m
[39m adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10' to include_dirs.[0m
[39mcopying /usr/lib/python3/dist-packages/numpy/f2py/src/fortranobject.c -> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10[0m
[39mcopying /usr/lib/python3/dist-packages/numpy/f2py/src/fortranobject.h -> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10[0m
[39m adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90' to sources.[0m
[39mbuild_src: building npy-pkg config files[0m
[39mrunning build_ext[0m
[39mcustomize UnixCCompiler[0m
[39mcustomize UnixCCompiler using build_ext[0m
[39mget_default_fcompiler: matching types: '['gnu95', 'intel', 'lahey', 'pg', 'nv', 'absoft', 'nag', 'vast', 'compaq', 'intele', 'intelem', 'gnu', 'g95', 'pathf95', 'nagfor', 'fujitsu']'[0m
[39mcustomize Gnu95FCompiler[0m
[39mFound executable /usr/bin/gfortran[0m
[39mcustomize Gnu95FCompiler[0m
[39mcustomize Gnu95FCompiler using build_ext[0m
[39mbuilding 'fsolver' extension[0m
[39mcompiling C sources[0m
[39mC compiler: x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC
[0m
[39mcreating /tmp/tmpui9nfs8q/tmp[0m
[39mcreating /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q[0m
[39mcreating /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10[0m
[39mcompile options: '-DNPY_DISABLE_OPTIMIZATION=1 -I/tmp/tmpui9nfs8q/src.linux-x86_64-3.10 -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/include/python3.10 -c'[0m
[39mx86_64-linux-gnu-gcc: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c[0m
[39mx86_64-linux-gnu-gcc: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c[0m
In file included from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1969[m[K,
from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/arrayobject.h:4[m[K,
from [01m[K/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.h:13[m[K,
from [01m[K/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c:2[m[K:
[01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2:[m[K [01;35m[Kwarning: [m[K#warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [[01;35m[K]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wcpp-Wcpp]8;;[m[K]
17 | #[01;35m[Kwarning[m[K "Using deprecated NumPy API, disable it with " \
| [01;35m[K^~~~~~~[m[K
In file included from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1969[m[K,
from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
from [01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/arrayobject.h:4[m[K,
from [01m[K/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.h:13[m[K,
from [01m[K/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c:16[m[K:
[01m[K/usr/lib/python3/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2:[m[K [01;35m[Kwarning: [m[K#warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [[01;35m[K]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wcpp-Wcpp]8;;[m[K]
17 | #[01;35m[Kwarning[m[K "Using deprecated NumPy API, disable it with " \
| [01;35m[K^~~~~~~[m[K
[01m[K/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c:137:12:[m[K [01;35m[Kwarning: [m[K‘[01m[Kf2py_size[m[K’ defined but not used [[01;35m[K]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-function-Wunused-function]8;;[m[K]
137 | static int [01;35m[Kf2py_size[m[K(PyArrayObject* var, ...)
| [01;35m[K^~~~~~~~~[m[K
[39mcompiling Fortran sources[0m
[39mFortran f77 compiler: /usr/bin/gfortran -Wall -g -ffixed-form -fno-second-underscore -fPIC -O3 -funroll-loops
Fortran f90 compiler: /usr/bin/gfortran -Wall -g -fno-second-underscore -fPIC -O3 -funroll-loops
Fortran fix compiler: /usr/bin/gfortran -Wall -g -ffixed-form -fno-second-underscore -Wall -g -fno-second-underscore -fPIC -O3 -funroll-loops[0m
[39mcompile options: '-I/tmp/tmpui9nfs8q/src.linux-x86_64-3.10 -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/include/python3.10 -c'[0m
[39mgfortran:f90: fjacobi.f90[0m
[39mgfortran:f90: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90[0m
[39m/usr/bin/gfortran -Wall -g -Wall -g -shared /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.o /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.o /tmp/tmpui9nfs8q/fjacobi.o /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -o ./fsolver.cpython-310-x86_64-linux-gnu.so[0m
Removing build directory /tmp/tmpui9nfs8q
# Import Fortran Jacobi solver
from fsolver import jacobi as fjacobi
%time solve_laplace(49, fjacobi, order='F')
CPU times: user 59.1 ms, sys: 16 ms, total: 75.1 ms
Wall time: 68.8 ms
9.997943770878389e-06
%prun -T prof_fjacobi.txt solve_laplace(49, fjacobi, order='F')
print(open('prof_fjacobi.txt').read())
*** Profile printout saved to text file 'prof_fjacobi.txt'.
97229 function calls (92112 primitive calls) in 0.098 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.017 0.017 0.098 0.098 3211324933.py:1(solve_laplace)
5117 0.014 0.000 0.029 0.000 linalg.py:2363(norm)
5117 0.013 0.000 0.027 0.000 function_base.py:422(asarray_chkfinite)
5117 0.010 0.000 0.010 0.000 3211324933.py:24(bc)
5117 0.010 0.000 0.010 0.000 {method 'reduce' of 'numpy.ufunc' objects}
10234/5117 0.010 0.000 0.031 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
5117 0.008 0.000 0.071 0.000 _misc.py:17(norm)
5117 0.003 0.000 0.035 0.000 <__array_function__ internals>:2(norm)
5117 0.002 0.000 0.011 0.000 <__array_function__ internals>:2(dot)
5117 0.002 0.000 0.013 0.000 {method 'all' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.012 0.000 _methods.py:60(_all)
10234 0.001 0.000 0.001 0.000 {built-in method numpy.asarray}
5117 0.001 0.000 0.001 0.000 {method 'ravel' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.001 0.000 linalg.py:112(isComplexType)
10234 0.001 0.000 0.001 0.000 {built-in method builtins.issubclass}
5117 0.001 0.000 0.001 0.000 multiarray.py:736(dot)
5117 0.001 0.000 0.001 0.000 linalg.py:2359(_norm_dispatcher)
2 0.000 0.000 0.000 0.000 {built-in method numpy.zeros}
1 0.000 0.000 0.098 0.098 {built-in method builtins.exec}
1 0.000 0.000 0.098 0.098 <string>:1(<module>)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
%lprun -T lprof_fjacobi.txt -f solve_laplace solve_laplace(49, fjacobi, order='F')
print(open('lprof_fjacobi.txt').read())
*** Profile printout saved to text file 'lprof_fjacobi.txt'.
Timer unit: 1e-09 s
Total time: 0.114883 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1 def solve_laplace(n, solver, tol=1e-5, order='C'):
2 """
3 Laplace Equation solver
4
5 Parameters
6 ----------
7 n : integer
8 size
9 solver : function
10 iterative solver
11 tol : float
12 tolerance
13 order : string
14 'C' | 'F'
15
16 Returns
17 -------
18 err : float
19 residual
20 """
21 1 10970.0 10970.0 0.0 ti = np.zeros((n+2, n+2), order=order)
22 1 36369.0 36369.0 0.0 dt = np.zeros((n+2, n+2), order=order)
23
24 1 341.0 341.0 0.0 def bc(t):
25 t[-1, 1:-1] = 300
26 t[0, 1:-1] = 100
27 t[1:-1, -1] = 100
28 t[1:-1, 0] = 100
29
30 1 120.0 120.0 0.0 err = 1
31 1 120.0 120.0 0.0 hist = []
32 5117 1210287.0 236.5 1.1 while err > tol:
33 # Apply BC
34 5117 12035320.0 2352.0 10.5 bc(ti)
35
36 # Run Gauss-Seidel
37 5117 12221519.0 2388.4 10.6 solver(n, ti, dt)
38
39 # Compute Error
40 5117 89368076.0 17464.9 77.8 err = linalg.norm(dt) / n
41
42 1 100.0 100.0 0.0 return err
가속화 #2
Numba 등 파이썬 가속 패키지 활용
파이썬 코드를 LLVM toolchain을 이용하여 가속화
Just-in Time (JIT) complication
import numba as nb
@nb.njit(fastmath=True)
def jacobi_nb(n, ti, dt):
"""
Jacobi method
Parameters
----------
n : integer
size
ti : float
current time
dt : array
difference
"""
for i in range(1, n+1):
for j in range(1, n+1):
dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
# Update
ti += dt
%time solve_laplace(49, jacobi_nb)
CPU times: user 197 ms, sys: 4 ms, total: 202 ms
Wall time: 201 ms
9.997943770878389e-06
%prun -T prof_jacobi_nb.txt solve_laplace(49, jacobi_nb)
print(open('prof_jacobi_nb.txt').read())
*** Profile printout saved to text file 'prof_jacobi_nb.txt'.
102346 function calls (97229 primitive calls) in 0.106 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
5117 0.031 0.000 0.031 0.000 3143608721.py:1(jacobi_nb)
5117 0.012 0.000 0.024 0.000 linalg.py:2363(norm)
5117 0.011 0.000 0.022 0.000 function_base.py:422(asarray_chkfinite)
10234/5117 0.009 0.000 0.026 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
1 0.009 0.009 0.106 0.106 3211324933.py:1(solve_laplace)
5117 0.008 0.000 0.008 0.000 3211324933.py:24(bc)
5117 0.008 0.000 0.008 0.000 {method 'reduce' of 'numpy.ufunc' objects}
5117 0.007 0.000 0.058 0.000 _misc.py:17(norm)
5117 0.003 0.000 0.030 0.000 <__array_function__ internals>:2(norm)
5117 0.002 0.000 0.009 0.000 <__array_function__ internals>:2(dot)
5117 0.001 0.000 0.011 0.000 {method 'all' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.009 0.000 _methods.py:60(_all)
10234 0.001 0.000 0.001 0.000 {built-in method numpy.asarray}
5117 0.001 0.000 0.001 0.000 {method 'ravel' of 'numpy.ndarray' objects}
5117 0.001 0.000 0.001 0.000 linalg.py:112(isComplexType)
10234 0.001 0.000 0.001 0.000 {built-in method builtins.issubclass}
5117 0.001 0.000 0.001 0.000 linalg.py:2359(_norm_dispatcher)
5117 0.001 0.000 0.001 0.000 multiarray.py:736(dot)
2 0.000 0.000 0.000 0.000 {built-in method numpy.zeros}
1 0.000 0.000 0.106 0.106 {built-in method builtins.exec}
1 0.000 0.000 0.106 0.106 <string>:1(<module>)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
%lprun -T lprof_jacobi_nb.txt -f solve_laplace solve_laplace(49, jacobi_nb)
print(open('lprof_jacobi_nb.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_nb.txt'.
Timer unit: 1e-09 s
Total time: 0.126012 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1 def solve_laplace(n, solver, tol=1e-5, order='C'):
2 """
3 Laplace Equation solver
4
5 Parameters
6 ----------
7 n : integer
8 size
9 solver : function
10 iterative solver
11 tol : float
12 tolerance
13 order : string
14 'C' | 'F'
15
16 Returns
17 -------
18 err : float
19 residual
20 """
21 1 17343.0 17343.0 0.0 ti = np.zeros((n+2, n+2), order=order)
22 1 21340.0 21340.0 0.0 dt = np.zeros((n+2, n+2), order=order)
23
24 1 661.0 661.0 0.0 def bc(t):
25 t[-1, 1:-1] = 300
26 t[0, 1:-1] = 100
27 t[1:-1, -1] = 100
28 t[1:-1, 0] = 100
29
30 1 281.0 281.0 0.0 err = 1
31 1 260.0 260.0 0.0 hist = []
32 5117 1200012.0 234.5 1.0 while err > tol:
33 # Apply BC
34 5117 11668397.0 2280.3 9.3 bc(ti)
35
36 # Run Gauss-Seidel
37 5117 33923833.0 6629.6 26.9 solver(n, ti, dt)
38
39 # Compute Error
40 5117 79179650.0 15473.8 62.8 err = linalg.norm(dt) / n
41
42 1 100.0 100.0 0.0 return err
좀 더 큰 문제 시간 비교#
Pure Python 보다 현격하게 빠름
Fortran 결과가 더 빠르나, 이는 포트란 컴파일러와 LLVM 컴파일러 버전에 따라 다를 수 있음
n = 199
%time solve_laplace(n, jacobi_v1)
%time solve_laplace(n, fjacobi, order='F')
%time solve_laplace(n, jacobi_nb)
CPU times: user 2min 9s, sys: 1.94 s, total: 2min 11s
Wall time: 8.22 s
CPU times: user 1min 4s, sys: 840 ms, total: 1min 4s
Wall time: 4.06 s
CPU times: user 1min 46s, sys: 1.42 s, total: 1min 48s
Wall time: 6.77 s
9.999287364322627e-06